{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  在 MLFlow 上运行 Phi-3 模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "本笔记本以 Phi-3 mini 4K Instruct 为例，介绍了如何使用 Transformer pipeline。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入所需的库\n",
    "import mlflow\n",
    "import transformers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selecting Phi-3 Mini 4K from HuggingFace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9f1bb22f43e44552881ddca9ec96f20d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "# 在 HuggingFace 上选择 Phi-3 模型\n",
    "pipeline = transformers.pipeline(\n",
    "    task = \"text-generation\",\n",
    "    model = \"microsoft/Phi-3-mini-4k-instruct\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成 MLFlow 工件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置 MLflow 模型配置\n",
    "model_config = {\n",
    "    \"max_length\": 300,\n",
    "    \"truncation\": True,\n",
    "    \"include_prompt\": False,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  从 HuggingFace model 生成 MLflow 工件\n",
    "with mlflow.start_run():\n",
    "    model_info = mlflow.transformers.log_model(\n",
    "        transformers_model = pipeline,\n",
    "        artifact_path = \"phi3-mlflow-model\",\n",
    "        model_config = model_config,\n",
    "        task = \"llm/v1/chat\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 作为 MLFlow 模型运行 Phi-3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7e3557733e55422a81b3a122072830e6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/34 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "# Loading Phi-3 MLFlow model\n",
    "loaded_model = mlflow.pyfunc.load_model(\n",
    "    model_uri = model_info.model_uri\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "inputs: \n",
       "  ['messages': Array({content: string (required), name: string (optional), role: string (required)}) (required), 'temperature': double (optional), 'max_tokens': long (optional), 'stop': Array(string) (optional), 'n': long (optional), 'stream': boolean (optional)]\n",
       "outputs: \n",
       "  ['id': string (required), 'object': string (required), 'created': long (required), 'model': string (required), 'choices': Array({finish_reason: string (required), index: long (required), message: {content: string (required), name: string (optional), role: string (required)} (required)}) (required), 'usage': {completion_tokens: long (required), prompt_tokens: long (required), total_tokens: long (required)} (required)]\n",
       "params: \n",
       "  None"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loaded_model.metadata.signature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are not running the flash-attention implementation, expect numerical differences.\n",
      "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'id': 'f9c2ca5c-b684-4a77-8a43-356b671fd797', 'object': 'chat.completion', 'created': 1718709547, 'model': 'microsoft/Phi-3-mini-4k-instruct', 'usage': {'prompt_tokens': 11, 'completion_tokens': 73, 'total_tokens': 84}, 'choices': [{'index': 0, 'finish_reason': 'stop', 'message': {'role': 'assistant', 'content': 'The capital of Spain is Madrid. It is the largest city in Spain and serves as the political, economic, and cultural center of the country. Madrid is located in the center of the Iberian Peninsula and is known for its rich history, art, and architecture, including the Royal Palace, the Prado Museum, and the Plaza Mayor.'}}]}]\n"
     ]
    }
   ],
   "source": [
    "# 测试 MLFlow Phi-3 模型的推理能力\n",
    "messages = [{\"role\": \"user\", \"content\": \"What is the capital of Spain?\"}]\n",
    "response = loaded_model.predict(\n",
    "    {\"messages\": messages}\n",
    ")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Question: What is the capital of Spain?\n",
      "\n",
      "Response: The capital of Spain is Madrid. It is the largest city in Spain and serves as the political, economic, and cultural center of the country. Madrid is located in the center of the Iberian Peninsula and is known for its rich history, art, and architecture, including the Royal Palace, the Prado Museum, and the Plaza Mayor.\n",
      "\n",
      "Usage: {'prompt_tokens': 11, 'completion_tokens': 73, 'total_tokens': 84}\n"
     ]
    }
   ],
   "source": [
    "# \"美化 \"输出\n",
    "print(f\"Question: {messages[0]['content']}\\n\")\n",
    "print(f\"Response: {response[0]['choices'][0]['message']['content']}\\n\")\n",
    "print(f\"Usage: {response[0]['usage']}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
